This workshop was mainly adapted from the mixOmics tutorials (http://mixomics.org) and the Bioconductor vignette (https://bioconductor.org/packages/release/bioc/html/mixOmics.html).
Suggested answers for the practice sections of the workshop are provided below.
library(mixOmics)
## Loading required package: MASS
## Loading required package: lattice
## Loading required package: ggplot2
##
## Loaded mixOmics 6.24.0
## Thank you for using mixOmics!
## Tutorials: http://mixomics.org
## Bookdown vignette: https://mixomicsteam.github.io/Bookdown
## Questions, issues: Follow the prompts at http://mixomics.org/contact-us
## Cite us: citation('mixOmics')
data(breast.TCGA)
# Extract training data and name each data frame
# There are 3 dataframes (3 omics of the same samples), stored as list
# In each dataframe, the rows represent samples.
# Columns represent feature measurments (genes, miRNA, protein)
X <- list(miRNA = breast.TCGA$data.train$mirna,
mRNA = breast.TCGA$data.train$mrna,
protein = breast.TCGA$data.train$protein)
# Vector of cancer subtype values, the experimental group
Y <- breast.TCGA$data.train$subtype
pca.miRNA <- pca(X$miRNA, ncomp=10, center = TRUE, scale = TRUE, logratio = "CLR")
plotIndiv(pca.miRNA, ind.names = FALSE, group = Y, ellipse = TRUE,
ellipse.level = 0.6,legend = TRUE,
legend.title = "Cancer\nSubtype", title = "PCA of miRNA data")
# removed CLR transformation for the protein dataset
pca.protein <- pca(X$protein, ncomp=10, center = TRUE, scale = TRUE)
plotIndiv(pca.protein, ind.names = FALSE, group = Y, ellipse = TRUE,
ellipse.level = 0.6,legend = TRUE,
legend.title = "Cancer\nSubtype", title = "PCA of protein data")
# check top loadings on component 1 for miRNA data
plotLoadings(pca.miRNA, comp = 1, ndisplay = 10)
plotLoadings(pca.miRNA, comp = 2, ndisplay = 10)
# check top loadings on component 1/2 for protein data
plotLoadings(pca.protein, comp = 1, ndisplay = 10)
plotLoadings(pca.protein, comp = 2, ndisplay = 10)
plotVar(pca.miRNA, cutoff=0.7)
plotVar(pca.protein, cutoff=0.7)
test.keepX <- round(2^seq(3,6,0.25))
test.keepX
## [1] 8 10 11 13 16 19 23 27 32 38 45 54 64
set.seed(123)
# Slow step:
tune.splsda.miRNA <- tune.splsda(X$miRNA, Y = Y, ncomp = 4, validation = "Mfold",
folds = 5, test.keepX = test.keepX,
dist = "centroids.dist",
scale = TRUE, logratio = "CLR",
nrepeat = 60, cpus = 3, progressBar = FALSE,
measure = "BER")
saveRDS(tune.splsda.miRNA, file = "tune.splsda.miRNA.rds")
tune.splsda.miRNA <- readRDS("tune.splsda.miRNA.rds")
plot(tune.splsda.miRNA)
tune.splsda.miRNA$choice.ncomp$ncomp
## [1] 4
tune.splsda.miRNA$choice.keepX
## comp1 comp2 comp3 comp4
## 54 64 64 54
# Tuning suggests 4 components with 54,64,64,64 features.
# We might prefer a simpler model with 2 componets (54,64) features.
tuned.splsda.miRNA <- splsda(X$miRNA, Y = Y, ncomp = 3, keepX = c(54, 64, 64))
plotIndiv(tuned.splsda.miRNA, title="Tuned sPLSDA miRNA",
ind.names = FALSE, ellipse = TRUE, ellipse.level = 0.6,
legend.title = "Cancer\nSubtype", legend = TRUE)
plotIndiv(tuned.splsda.miRNA, title="Tuned sPLSDA miRNA", comp = c(1,3),
ind.names = FALSE, ellipse = TRUE, ellipse.level = 0.6,
legend.title = "Cancer\nSubtype", legend = TRUE)
# 3d sample plot (not great for printing, but okay to have a look and consider the contribution of a third component)
# plotIndiv(tuned.splsda.miRNA, title="Tuned sPLSDA miRNA", comp = 1:3,
# style = "3d",
# ind.names = FALSE, ellipse = TRUE, ellipse.level = 0.6,
# legend.title = "Cancer\nSubtype", legend = TRUE)
Y.basal <- c("Other","Basal")[as.integer(Y=="Basal")+1]
Y.basal
## [1] "Basal" "Basal" "Basal" "Basal" "Basal" "Basal" "Basal" "Basal" "Basal"
## [10] "Basal" "Basal" "Basal" "Basal" "Basal" "Basal" "Basal" "Basal" "Basal"
## [19] "Basal" "Basal" "Basal" "Basal" "Basal" "Basal" "Basal" "Basal" "Basal"
## [28] "Basal" "Basal" "Basal" "Basal" "Basal" "Basal" "Basal" "Basal" "Basal"
## [37] "Basal" "Basal" "Basal" "Basal" "Basal" "Basal" "Basal" "Basal" "Basal"
## [46] "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other"
## [55] "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other"
## [64] "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other"
## [73] "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other"
## [82] "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other"
## [91] "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other"
## [100] "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other"
## [109] "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other"
## [118] "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other"
## [127] "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other"
## [136] "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other" "Other"
## [145] "Other" "Other" "Other" "Other" "Other" "Other"
table(Y.basal)
## Y.basal
## Basal Other
## 45 105
tune.splsda.mRNA.basal <- tune.splsda(X$mRNA, Y = Y.basal, ncomp = 3, validation = "Mfold",
folds = 5, test.keepX = test.keepX,
dist = "centroids.dist",
scale = TRUE, logratio = "CLR",
nrepeat = 60, cpus = 3, progressBar = FALSE,
measure = "BER")
saveRDS(tune.splsda.mRNA.basal, file = "tune.splsda.mRNA.basal.rds")
tune.splsda.mRNA.basal <- readRDS("tune.splsda.mRNA.basal.rds")
plot(tune.splsda.mRNA.basal)
tune.splsda.mRNA.basal$choice.keepX
## comp1 comp2 comp3
## 32 54 23
# ^ one component with 27 mRNA features looks great for this.
tuned.splsda.mRNA.basal <- splsda(X$mRNA, Y = Y.basal, ncomp = 1, keepX = 27)
plotLoadings(tuned.splsda.mRNA.basal, contrib = "max", method = "median",
size.name = 0.5, size.title = 1,
title = "Basal vs Other: mRNA signature")
# Prepare test set data
data.test.miRNA <- breast.TCGA$data.test$mirna
Y.test <- breast.TCGA$data.test$subtype
table(Y.test)
## Y.test
## Basal Her2 LumA
## 21 14 35
predict.miRNA <- predict(tuned.splsda.miRNA, newdata = data.test.miRNA,
dist = "centroids.dist")
confusion.mat.miRNA <- get.confusion_matrix(truth = Y.test,
predicted = predict.miRNA$MajorityVote$centroids.dist[,3])
confusion.mat.miRNA
## predicted.as.Basal predicted.as.Her2 predicted.as.LumA
## Basal 18 3 0
## Her2 0 13 1
## LumA 0 4 31
get.BER(confusion.mat.miRNA)
## [1] 0.1095238
# 11% balanced error rate on independent cases,
# using miRNA sPLSDA model (3 components, keepX=c(54,64,64))
# how about a simpler model with 2 components:
tuned.splsda.miRNA.2 <- splsda(X$miRNA, Y = Y, ncomp = 2, keepX = c(54, 64))
predict.miRNA <- predict(tuned.splsda.miRNA.2, newdata = data.test.miRNA,
dist = "centroids.dist")
confusion.mat.miRNA <- get.confusion_matrix(truth = Y.test,
predicted = predict.miRNA$MajorityVote$centroids.dist[,2])
confusion.mat.miRNA
## predicted.as.Basal predicted.as.Her2 predicted.as.LumA
## Basal 17 4 0
## Her2 1 13 0
## LumA 0 3 32
get.BER(confusion.mat.miRNA)
## [1] 0.115873
# 11.5% BER is a worse performance,
# i.e. 3 miRNA components was better for miRNA predictive value
shrink.rcc.miRNA.mRNA <- rcc(X$miRNA,X$mRNA, method = 'shrinkage')
# examine the optimal lambda values after shrinkage
shrink.rcc.miRNA.mRNA$lambda
## lambda1 lambda2
## 0.1098423 0.1210357
plot(shrink.rcc.miRNA.mRNA, type = "barplot", main = "Shrinkage component plot")
plotArrow(shrink.rcc.miRNA.mRNA, group = Y,
col.per.group = color.mixo(1:3),
title = 'rCCA', legend.title = "Cancer\nSubtype")
plotVar(shrink.rcc.miRNA.mRNA, var.names = c(FALSE, TRUE),
cex = c(4, 4), cutoff = 0.6, col = c("black", "black"), overlap = FALSE,
title = 'rCCA miRNA + mRNA')
network(shrink.rcc.miRNA.mRNA, comp = 1, interactive = FALSE,
lwd.edge = 2,graph.scale = 0.5,
cutoff = 0.6, color.node = color.mixo(4:5),
save = "png", name.save = "miRNA.mRNA.network.comp1")
network(shrink.rcc.miRNA.mRNA, comp = 2, interactive = FALSE,
lwd.edge = 2, graph.scale = 0.3,
cutoff = 0.45, color.node = color.mixo(4:5),
save = "png", name.save = "miRNA.mRNA.network.comp2")
cim(shrink.rcc.miRNA.mRNA, cutoff = 0.5, comp = 1:2,
xlab = "mRNA", ylab = "miRNA",
col.cex = 0.5, row.cex=0.5, margins=c(6,5),
save = "png", name.save = "rcc.miRNA.mRNA.cim")
spls.mRNA.protein <- spls(X = X$mRNA, Y = X$protein, ncomp = 5, mode = 'regression')
perf.spls.mRNA.protein <- perf(spls.mRNA.protein, validation = 'Mfold',
folds = 6, nrepeat = 5)
plot(perf.spls.mRNA.protein, criterion = 'Q2.total')
list.keepX <- 2^(3:7)
list.keepY <- c(3:10)
tune.spls.miRNA.RNA <- tune.spls(X$miRNA, X$mRNA, ncomp = 2,
test.keepX = list.keepX,
test.keepY = list.keepY,
nrepeat = 1, folds = 6,
mode = 'regression', measure = 'cor')
plot(tune.spls.miRNA.RNA) # use the correlation measure for tuning
optimal.keepX <- tune.spls.miRNA.RNA$choice.keepX
optimal.keepY <- tune.spls.miRNA.RNA$choice.keepY
final.spls.mRNA.protein <- spls(X$mRNA, X$protein, ncomp = 2,
keepX = optimal.keepX,
keepY = optimal.keepY,
mode = "regression") # explanatory approach being used,
# hence use regression mode
plotArrow(final.spls.mRNA.protein, rep.space = "XY-variate",
ind.names = FALSE, group = Y, legend=TRUE, title = "sPLS mRNA+protein",
legend.title = "Cancer\nSubtype")
color.edge <- color.GreenRed(50) # set the colours of the connecting lines
network(final.spls.mRNA.protein, comp = 1:2,
cutoff = 0.5,
shape.node = c("rectangle", "circle"),
color.node = color.mixo(5:6),
size.node = 0.3,
color.edge = color.edge,
save = 'png',
name.save = 'sPLS.mRNA.protein.network')
cim(final.spls.mRNA.protein, comp = 1:2, xlab = "Protein", ylab = "mRNA",
margins = c(6,8),
cutoff = 0.5, save = "png", name.save = "sPLS.mRNA.protein.cim")
practice.spls.mRNA.GATA3 <- spls(X$mRNA, X$protein[,"GATA3"], ncomp = 1,
keepX = 8,
keepY = 1,
mode = "canonical")
plotLoadings(practice.spls.mRNA.GATA3, block = "X")
# KDM4B has strong positive association to GATA3, as expected (https://doi.org/10.1093/nar/gkt469)
# CSRP2 has strong negative association to GATA3
selectVar(practice.spls.mRNA.GATA3)$X$name
## [1] "KDM4B" "CSRP2" "ZNF552" "SLC43A3" "PREX1" "FMNL2" "FUT8"
## [8] "LRIG1"
cim(practice.spls.mRNA.GATA3, comp = 1, xlab = "mRNA", ylab = "Samples",
margins = c(5,4),
cutoff = 0, save = "png", name.save = "sPLS.mRNA.GATA3.cim")
# set "cohort2": n=70 samples with miRNA and mRNA data
X.cohort2 <- list(miRNA = breast.TCGA$data.test$mirna,
mRNA = breast.TCGA$data.test$mrna)
Y.cohort2 <- breast.TCGA$data.test$subtype
lapply(X.cohort2, FUN = dim)
## $miRNA
## [1] 70 184
##
## $mRNA
## [1] 70 200
length(Y.cohort2)
## [1] 70
# design matrix, the relationship between datasets
design.cohort2 <- matrix(0.1, ncol = length(X.cohort2), nrow = length(X.cohort2),
dimnames = list(names(X.cohort2), names(X.cohort2)))
diag(design.cohort2) <- 0
design.cohort2
## miRNA mRNA
## miRNA 0.0 0.1
## mRNA 0.1 0.0
diablo.cohort2 <- block.plsda(X.cohort2,
Y.cohort2,
ncomp = 5,
design = design.cohort2)
## Design matrix has changed to include Y; each block will be
## linked to Y.
set.seed(123) # For reproducibility, remove for your analyses
perf.diablo.cohort2 = perf(diablo.cohort2, validation = 'Mfold',
folds = 6, nrepeat = 30, cpus = 3)
plot(perf.diablo.cohort2)
perf.diablo.cohort2$MajorityVote
## list()
ncomp <- perf.diablo.cohort2$choice.ncomp$WeightedVote["Overall.BER", "centroids.dist"]
# result suggests to use 3 components
#ncomp <- 2 # (we could consider choosing a low number of components than suggested)
ncomp
## [1] 3
# testing range, for the number of features to select
test.keepX <- list(miRNA = c(5:7, round(2^(seq(3,5,0.5)))),
mRNA = c(5:7, round(2^(seq(3,5,0.5)))))
set.seed(123) # for reproducibility
tune.diablo.cohort2 <- tune.block.splsda(X.cohort2,
Y.cohort2, ncomp = ncomp,
test.keepX = test.keepX, design = design.cohort2,
validation = 'Mfold', folds = 6, nrepeat = 3,
BPPARAM = BiocParallel::SnowParam(workers = 3),
dist = "centroids.dist")
## Design matrix has changed to include Y; each block will be
## linked to Y.
##
## You have provided a sequence of keepX of length: 8 for block miRNA and 8 for block mRNA.
## This results in 64 models being fitted for each component and each nrepeat, this may take some time to run, be patient!
ncomp.cohort2 <- tune.diablo.cohort2$choice.ncomp$ncomp # suggests 3 components per block
keepX.cohort2 <- tune.diablo.cohort2$choice.keepX # suggests keep 5,5,5 and 23,32,11 features
# could also manually input the values (if resuming from this point)
# opt.ncomp.cohort2 <- 3
# opt.keepX.cohort2 <- list(miRNA = c(5,5,5),
# mRNA = c(23,32,11))
# make a diablo integrated model, sparse feature selection
diablo.cohort2 <- block.splsda(X = X.cohort2,
Y = Y.cohort2,
keepX = keepX.cohort2,
ncomp = ncomp.cohort2,
design = design.cohort2)
## Design matrix has changed to include Y; each block will be
## linked to Y.
# obtain the selected features and weights
selectVar(diablo.cohort2, block = 'miRNA', comp = 1)
## $miRNA
## $miRNA$name
## [1] "hsa-mir-500a" "hsa-mir-532" "hsa-mir-501" "hsa-mir-505" "hsa-mir-130b"
##
## $miRNA$value
## value.var
## hsa-mir-500a 0.6457262
## hsa-mir-532 0.5547329
## hsa-mir-501 0.3568440
## hsa-mir-505 0.3256011
## hsa-mir-130b 0.2048303
##
##
## $comp
## [1] 1
selectVar(diablo.cohort2, block = 'mRNA', comp = 1)
## $mRNA
## $mRNA$name
## [1] "ZNF552" "SLC43A3" "LRIG1" "CCNA2" "KDM4B" "PREX1" "STC2"
## [8] "C4orf34" "PLCD4" "TTC39A" "KIF13B" "E2F1" "NCOA7" "FAM63A"
## [15] "HN1" "PLSCR1" "HTRA1" "SLC5A6" "FMNL2" "LYN" "ASPM"
## [22] "ILDR1" "IFITM2"
##
## $mRNA$value
## value.var
## ZNF552 -0.50992767
## SLC43A3 0.35895241
## LRIG1 -0.33183812
## CCNA2 0.27389658
## KDM4B -0.26725842
## PREX1 -0.23349891
## STC2 -0.22267161
## C4orf34 -0.19222356
## PLCD4 -0.19200410
## TTC39A -0.16507411
## KIF13B -0.16159680
## E2F1 0.15721232
## NCOA7 0.14641467
## FAM63A -0.14545800
## HN1 0.12025827
## PLSCR1 0.10814636
## HTRA1 -0.09920337
## SLC5A6 0.09353720
## FMNL2 0.07625932
## LYN 0.05408349
## ASPM 0.03337284
## ILDR1 -0.02768450
## IFITM2 -0.02452071
##
##
## $comp
## [1] 1
# if you want to export all selected features to csv:
cohort2.features.list <- list()
for (i in 1:diablo.cohort2$ncomp[1]) {
cohort2.features.list[[i]] <- data.frame(
name=c(selectVar(diablo.cohort2, comp=i)$miRNA$name,
selectVar(diablo.cohort2, comp=i)$mRNA$name),
value=c(selectVar(diablo.cohort2,comp = i)$miRNA$value$value.var,
selectVar(diablo.cohort2,comp = i)$mRNA$value$value.var),
block=c(rep("miRNA",
length(selectVar(diablo.cohort2, comp=i)$miRNA$name)),
rep("mRNA",
length(selectVar(diablo.cohort2, comp=i)$mRNA$name))),
comp=i)
}
cohort2.features.list
## [[1]]
## name value block comp
## 1 hsa-mir-500a 0.64572618 miRNA 1
## 2 hsa-mir-532 0.55473287 miRNA 1
## 3 hsa-mir-501 0.35684396 miRNA 1
## 4 hsa-mir-505 0.32560106 miRNA 1
## 5 hsa-mir-130b 0.20483034 miRNA 1
## 6 ZNF552 -0.50992767 mRNA 1
## 7 SLC43A3 0.35895241 mRNA 1
## 8 LRIG1 -0.33183812 mRNA 1
## 9 CCNA2 0.27389658 mRNA 1
## 10 KDM4B -0.26725842 mRNA 1
## 11 PREX1 -0.23349891 mRNA 1
## 12 STC2 -0.22267161 mRNA 1
## 13 C4orf34 -0.19222356 mRNA 1
## 14 PLCD4 -0.19200410 mRNA 1
## 15 TTC39A -0.16507411 mRNA 1
## 16 KIF13B -0.16159680 mRNA 1
## 17 E2F1 0.15721232 mRNA 1
## 18 NCOA7 0.14641467 mRNA 1
## 19 FAM63A -0.14545800 mRNA 1
## 20 HN1 0.12025827 mRNA 1
## 21 PLSCR1 0.10814636 mRNA 1
## 22 HTRA1 -0.09920337 mRNA 1
## 23 SLC5A6 0.09353720 mRNA 1
## 24 FMNL2 0.07625932 mRNA 1
## 25 LYN 0.05408349 mRNA 1
## 26 ASPM 0.03337284 mRNA 1
## 27 ILDR1 -0.02768450 mRNA 1
## 28 IFITM2 -0.02452071 mRNA 1
##
## [[2]]
## name value block comp
## 1 hsa-mir-210 0.691663801 miRNA 2
## 2 hsa-mir-30a -0.658905023 miRNA 2
## 3 hsa-mir-101-2 -0.278946398 miRNA 2
## 4 hsa-mir-30c-2 -0.090035710 miRNA 2
## 5 hsa-mir-101-1 -0.039087532 miRNA 2
## 6 NDRG2 -0.433755926 mRNA 2
## 7 EGLN3 0.377357162 mRNA 2
## 8 SDC1 0.277822044 mRNA 2
## 9 ZNF37B -0.261752330 mRNA 2
## 10 TP53INP2 0.261348927 mRNA 2
## 11 ALCAM 0.220768800 mRNA 2
## 12 WWC2 0.204915471 mRNA 2
## 13 ICAM2 -0.204456064 mRNA 2
## 14 ACADS -0.201529580 mRNA 2
## 15 C4orf34 0.201490928 mRNA 2
## 16 STAT5A -0.201327875 mRNA 2
## 17 ASPH 0.173648063 mRNA 2
## 18 TOB1 0.167600361 mRNA 2
## 19 BOC -0.162058854 mRNA 2
## 20 DBP -0.152383311 mRNA 2
## 21 PLEKHA4 -0.144868552 mRNA 2
## 22 TRIM45 -0.138129580 mRNA 2
## 23 CDK18 -0.107049682 mRNA 2
## 24 FZD4 0.106972519 mRNA 2
## 25 TIGD5 -0.083797631 mRNA 2
## 26 NES -0.074160168 mRNA 2
## 27 IL1R1 0.065884648 mRNA 2
## 28 PSIP1 -0.064468692 mRNA 2
## 29 CPPED1 0.055352187 mRNA 2
## 30 EPHB3 -0.051344666 mRNA 2
## 31 CPT1A 0.039648852 mRNA 2
## 32 FAM63A -0.032676916 mRNA 2
## 33 MTL5 -0.025132140 mRNA 2
## 34 CDCP1 0.015348090 mRNA 2
## 35 S1PR3 0.011992026 mRNA 2
## 36 FLJ23867 -0.009054893 mRNA 2
## 37 YPEL2 0.005398189 mRNA 2
##
## [[3]]
## name value block comp
## 1 hsa-mir-199a-1 0.62323146 miRNA 3
## 2 hsa-mir-199a-2 0.56554380 miRNA 3
## 3 hsa-mir-199b 0.46810016 miRNA 3
## 4 hsa-mir-374a -0.26534076 miRNA 3
## 5 hsa-mir-455 0.04710928 miRNA 3
## 6 POSTN 0.62334841 mRNA 3
## 7 FUT8 0.56936573 mRNA 3
## 8 FRMD6 0.38499141 mRNA 3
## 9 AKAP9 0.21348425 mRNA 3
## 10 TCF4 0.20536352 mRNA 3
## 11 DSG2 0.12269013 mRNA 3
## 12 CCDC80 0.11720266 mRNA 3
## 13 DNLZ -0.10115086 mRNA 3
## 14 COL6A1 0.09396975 mRNA 3
## 15 C7orf55 -0.05067405 mRNA 3
## 16 ZEB1 0.02953547 mRNA 3
cohort2.features.dataframe <- rbind(cohort2.features.list[[1]],
cohort2.features.list[[2]],
cohort2.features.list[[3]])
write.csv(cohort2.features.dataframe,
file = "cohort2.diablo.features.csv")
# diablo plots
plotDiablo(diablo.cohort2, ncomp = 1)
plotDiablo(diablo.cohort2, ncomp = 2)
# sample plots
plotIndiv(diablo.cohort2, ind.names = FALSE, legend = TRUE, comp=1:2,
title = 'Cohort2, DIABLO comp 1 - 2')
# arrow plots
plotArrow(diablo.cohort2, ind.names = FALSE, legend = TRUE,
legend.title = "Cancer\nSubtype", comp=1:2,
title = "Cohort2, DIABLO comp 1 - 2")
# prepare test data (cohort 1)
data.test.cohort1 <- list(miRNA = breast.TCGA$data.train$mirna,
mRNA = breast.TCGA$data.train$mrna)
# predict the class of cohort1 samples, using the DIABLO model of cohort2
predict.cohort1 <- predict(diablo.cohort2, newdata = data.test.cohort1,
dist = "centroids.dist")
table(predict.cohort1$WeightedVote$centroids.dist[,3])
##
## Basal Her2 LumA
## 45 28 77
confusion.mat.cohort1 <- get.confusion_matrix(truth = breast.TCGA$data.train$subtype,
predicted = predict.cohort1$WeightedVote$centroids.dist[,3])
confusion.mat.cohort1
## predicted.as.Basal predicted.as.Her2 predicted.as.LumA
## Basal 43 2 0
## Her2 2 24 4
## LumA 0 2 73
get.BER(confusion.mat.cohort1)
## [1] 0.09037037